library("FactoMineR")
library("dplyr")
library("factoextra")
library("Matrix.utils")
library("corrplot")
library("viridis")
library("missMDA")
library("mdatools")
library("dummies")
library("rgl")
library("pca3d")

###### Help & Color Palettes ##############
# https://www.nceas.ucsb.edu/sites/default/files/2020-04/colorPaletteCheatsheet.pdf
# http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/ #######

###########################################
################# PCA data ################
###########################################
# Load Spreadsheets
rm(list = ls())
data <- read.csv("~/Desktop/PCA CSVs/Fig.5_Ceramide_data_Cers_&_DHC_v2.csv", header = TRUE, dec = ",")

# change data frame from factor form into numeric form for protein data
options(digits=9) # preserve 9 digits after comma, necessary for as.numeric function
char_columns <- sapply(data[4:30], is.factor) # Identify factor columns
data_chars_as_num <- sapply(data[4:30], as.numeric) # Recode relevant characters as numeric
sapply(data_chars_as_num, mode)  # Print classes of columns 4-16
sapply(data[1:3], mode) # Print classes of columns 1-3
df <- data.frame(data[1:2], data_chars_as_num) # Replicate data in new dataframe merged with first few columns still as character columns
df[is.na(df)] <- 0 #replace NA with 0

pca1 = PCA(df, scale.unit=TRUE, ncp=3, quali.sup=c(1:2), graph=F) # Qualitative values = Meta data, e.g. sex, age, mobility, BMI 
summary(pca1)
# save pca summary as csv file
write.infile(pca1, "~/Desktop/PCA CSVs/Fig.5_Ceramide_data_Cers_&_DHC_v2_summary.csv", sep=",")
pca <- prcomp(df[,3:29], scale=TRUE) #pca with prcomp

# Summary information from prcomp object
s <- summary(pca)
# to understand more clearly how biplot is created - full results. List of different matrices is given. We are mostly interested in $rotation which is a matrix of variable loadings and $x which contains scores of observations
unclass(pca)


#### For plotting only ####
df <- data.frame(data["Group"], data_chars_as_num) # Replicate data in new dataframe merged with first few columns still as character columns
df[is.na(df)] <- 0 #replace NA with 0

pca1 = PCA(df, scale.unit=TRUE, ncp=3, quali.sup=c(3), graph=F) # Qualitative values = Meta data, e.g. sex, age, mobility, BMI 
summary(pca1)



mycolor <- c("indianred1", "pink","royalblue1", "lightblue")

pca3d(pca, group=df$Group,
      title= Null,
      show.plane=FALSE,
      #show.labels=df$Diet,
      labels.col="black",
      palette = mycolor,
      radius = 1.5,
      biplot.vars=2, 
      legend="right",
      show.centroids = T, #show cluster centroids and lines from each data point to the corresponding centroid.
      show.group.labels=F,
      show.shadows=T,
      ellipse.ci=T,
      #axis.titles= 
)







rgl.postscript("~/Desktop/PCA Ben/PCA_diet_sex.svg", fmt="svg")

#screeplot
screeplot(pca, type = "lines", npcs=24, ylim=c(0,30))

# Loadings Plot: Barchart
tiff("~/Documents/2. Studies/PHD/Data/3. Proteomics/Abundance Ratio Spreadsheets/PCA/20200806_muscle_pc1+pc2_loadingplot.tiff", units="cm", width=23, height=15, res=600, compression = 'lzw')

par(mfrow=c(2,1), mar=c(3,1.5,0.5,0.1))
n.pc1 <- ifelse(pca$rotation[,1] > 0, yes=-0.005, no=pca$rotation[,1]-0.001)
c.pc1 <- ifelse(pca$rotation[,1] > 0, yes="green2", no="red2")

b1 <- barplot(pca$rotation[,1], main="PC1 Loading Plot", las=3, border=c.pc1, col=c.pc1, ylim=c(-0.32,0.32), width=0.2, space=0.5, xaxs="i", axisnames=F, cex.axis=0.5, cex.main=0.7)
abline(h=0) # add vertical line
text(x=b1, y=n.pc1, labels=names(pca$rotation[,1]), adj=1, srt=90, xpd=T, cex=0.6) # Add variable names

n.pc2 <- ifelse(pca$rotation[,2] > 0, yes=-0.005, no=pca$rotation[,2]-0.001)
c.pc2 <- ifelse(pca$rotation[,2] > 0, yes="green2", no="red2")

b2 <- barplot(pca$rotation[,2], main="PC2 Loading Plot", las=3, border=c.pc2, col=c.pc2, yaxt="n", ylim=c(-0.32,0.32), width=0.2, space=0.5, xaxs="i", axisnames=F,  cex.main=0.7)
axis(2, at=seq(-0.32,0.32,0.02), las=3, cex.axis=0.5)
abline(h=0) # add vertical line
text(x=b2, y=n.pc2, labels=names(pca$rotation[,2]), adj=1, srt=90, xpd=T, cex=0.6) # Add variable names

dev.off()

#########################################################################
############### Create Screeplot & Cumulative variance plot #############
#########################################################################
# proportion of variation explained by each eigenvalue = given in second column, cumulative percentage is explained in second column
# first and second eigenvalue added up i.e. explain 39.13% of the variation
# Eigenvalue > 1 indivates that PCs account for more variance than accounted by one of the original variables in standardized data

tiff("~/Documents/2. Studies/PHD/Data/3. Proteomics/Abundance Ratio Spreadsheets/PCA/20200804_nmj_pca1_screeplot_onlyprotdata.tiff", units="cm", width=15, height=15, res=600, compression = 'lzw')

dev.off()

### GRAPH - save graph 
tiff("~/Documents/2. Studies/PHD/Data/3. Proteomics/Abundance Ratio Spreadsheets/PCA/20200703_nmj_pca1_agegroups_0.84coscutoff_variables.tiff", units="cm", width=15, height=15, res=600, compression = 'lzw')

# run only this part to create graph
fviz_pca(pca, title = "Liver Ceramides",
         # xlab = "PC1 (22.7%)", ylab = "PC2 (16.8%)",
         # geom.ind = "point",
         repel = TRUE, # Avoid text overlapping
         select.var = list(cos2 = 0.8),
         col.var = "grey43",
         labels = df$Diet,
         fill.ind = df$Sex,
         pointshape = 21, 
         pointsize="cos2",
         col.ind = "black",
         palette = c("#00AFBB", "#E7B800", "#FC4E07"),
         addEllipses = TRUE, # Concentration ellipses
         # ellipse.type = "confidence", # Ellipse shows confidence interval,
         mean.point = FALSE, # remove mean point of group
         legend.title = "Sex",
) +
  theme(axis.title = element_text(size = 13),
        axis.text = element_text(size = 12),
        text = element_text(size = 13),
        plot.title = element_text(hjust = 0.5)
  )

dev.off() # necessary to save graph



#---------
  
  
  